library(tidyverse)
library(tidylog)

### TWEETS
load("data/analysis/MPtweetsv2.Rdata")

#get MP panels and cross-section
mp <- read_csv("data/analysis/MP_panelv2.csv")
mpcs <- read_csv("data/analysis/MP_cs_all.csv")

## Get dates of first protests from panel
firstprot <- mp %>% 
  group_by(about) %>% 
  filter(fff_event==1) %>%
  mutate(date = min(date),
         firstdate = date) %>%
  slice(1) %>% # takes the first occurrence if there is a tie
  ungroup() %>%
  select(about, firstdate)

twts <- MPtweets %>%
  left_join(firstprot, by = c("about")) %>%
  group_by(about) %>%
  fill(firstdate, .direction = "downup") %>%
  mutate(fff_post = if_else(date >= firstdate, 1, 0)) %>%
  ungroup() %>%
  mutate(fff_post = if_else(is.na(fff_post), 0, fff_post)) %>%
  select(-firstdate)

# add in time-varying positions
mpcsp <- mp %>%
  select(about, category, date)

twts <- twts%>%
  left_join(mpcsp, by = c("about", "date"))

twts <- twts %>%
  mutate(frontbench = ifelse(category == "minister" | 
                               category == "speaker" |
                               category == "whip and minister" |
                               category == "shadow minister", 1, 0),
         frontbench = ifelse(is.na(category), 0, frontbench))

# add in electoral majorities data
mpcsmaj <- mpcs %>%
  select(about, majority_17)

twts <- twts %>%
  left_join(mpcsmaj, by = c("about"))

twts <- twts %>%
  mutate(majority_17_logged = log(1+majority_17),
         narrow_win = ifelse(majority_17 < 5, 1, 0),
         lab1_con0 = case_when(party_value=="Conservative" ~ 0,
                                      party_value=="Labour" ~ 1,
                                      party_value=="Labour (Co-op)" ~ 1))

# add post-GS and gender indicator variables
twts_corpus <- twts %>%
  mutate(female = if_else(gender_value == "Female", 1L, 0L),
    postgs = if_else(date >= "2019-03-15", 1L, 0L)) %>%
  relocate(date)

saveRDS(twts_corpus, file = "data/analysis/MPtweets_corpus.rds")



### SPEECHES
load("data/analysis/MPspeeches.Rdata")

speeches <- speeches %>%
  rename(date = speech_date) %>%
  relocate(date) %>%
  mutate(about = as.numeric(about))


spchs <- speeches %>%
  left_join(firstprot, by = "about") %>%
  group_by(about) %>%
  fill(firstdate, .direction = "downup") %>%
  mutate(fff_post = if_else(date >= firstdate, 1, 0)) %>%
  ungroup() %>%
  mutate(fff_post = if_else(is.na(fff_post), 0, fff_post)) %>%
  select(-firstdate)


# add in time-varying and cross-sectional covariates
mpcsp <- mp %>%
  select(about, category, date, 
         majority_17, gender, party_value)

spchs <- spchs %>%
  left_join(mpcsp, by = c("about", "date"))

spchs <- spchs %>%
  mutate(frontbench = ifelse(category == "minister" | 
                               category == "speaker" |
                               category == "whip and minister" |
                               category == "shadow minister", 1, 0),
         frontbench = ifelse(is.na(category), 0, frontbench))

# add in other cross-sectional: majority, party, gender

spchs <- spchs %>%
  mutate(majority_17_logged = log(1+majority_17),
         narrow_win = ifelse(majority_17 < 5, 1, 0),
         lab1_con0 = case_when(party_value=="Conservative" ~ 0,
                               party_value=="Labour" ~ 1,
                               party_value=="Labour (Co-op)" ~ 1))

# add post-GS and gender indicator variables
spchs_corpus <- spchs %>%
  mutate(female = if_else(gender == "Female", 1L, 0L),
         postgs = if_else(date >= "2019-03-15", 1L, 0L)) %>%
  relocate(date)

saveRDS(spchs_corpus, file = "data/analysis/MPspeeches_corpus.rds")
